### Performs the individual level analysis (with matching, etc)
### Loads the extract of the survey data (2014 BEPS)
### Analysis is done for two waves of the BEPS 2014 survey
### One pre-electoral and another post-electoral
### For each, we first perform the matching and check balance, 
### Then analyze the matched data set using serveral different models
library(Zelig)
library(ZeligChoice)
library(MatchIt)
library(car)
library(RItools)
library(xtable)
#setwd("~/pathwheredatafilesare")
load("data_replic2014beps.RData") 
	#this is an extract of the full individual level dataset
	#merged with municipality level data

######################################################################
####										     				  ####
#### Matching and balance anlaysis	(using WAVE F vote recall)    ####
#### 	                                            			  ####
######################################################################

### Use post-election vote intention and drop pre-election from the set
tmp <- new.data
tmp$dilma <- tmp$dilma.post
tmp$dilma.pre <- NULL
tmp <- na.omit(tmp) #use only non missing observations
cat("Analysis on post electoral recall using",nrow(tmp),
    "of the original",nrow(new.data),"observations\n")

### Compute propensity score
### This is not really necessary, as matchit uses PS by default, but...
tmp$ps <- ps <- fitted(lm(bf~sexo+idade+escola+renda+
			metropolitan+race+region+munsize,data=tmp))
			
match.formula <- formula("bf~sexo+idade+escola+renda+metropolitan+race+region+munsize+ps")
matching <- matchit(match.formula
                   ,data=tmp,method="genetic",replace=T,ratio=1,pop.size=125) 
                   
                   
### Plot matching reuslts #########
pdf(file="figure-balimprove2014BEPS-post.pdf")
plot(summary(matching,standardize=TRUE),interactive=F)
dev.off()

### Compute and save matching statistics ###
match.stats <- cbind(
	summary(matching)$sum.all[,1:2],
	summary(matching)$sum.matched[,1:2],
	Reduction=summary(matching)$reduction[,1]
)   

#Matching was done with replacement, so weights are different for eaach observation
#xBalance doesn't take weights, so use work-arournd to compute Bowler&Hansosn stats
forbal <- rbind(tmp[matching$match.matrix[,1],],	
			tmp[rownames(matching$match.matrix),])
bal<-xBalance(match.formula,data=forbal,report="all")
cat("Bowler and Hanson omnibus test\n");print(bal$overall)
match.stats <- list(match.stats,BowlerHanson=bal$overall)
balancepost<- list(president=match.stats)  #reported in table 1 of the paper!
pdf(file="figure-balance2014BEPS-post.pdf")
plot(bal,thecols=1,legend=F,mar=c(3,2,.5,.5),xlim=c(-0.3,.3),xaxt="n")
abline(v=c(-.25,.25),lty=2)
dev.off()

### Assemble matched set
matched.data <- match.data(matching)
matched.data$dilma <- as.numeric(as.character(matched.data$dilma))
matched.data$renda <- factor(matched.data$renda)

########################################################################################
# 
#  Analyzing the matched data set
#  Linear probability models with bootstrapped standard errors and simple logit 
#  See http://cran.r-project.org/doc/contrib/Fox-Companion/appendix-bootstrapping.pdf
#
#######################################################################################

fit1 <- zelig(dilma~bf, model="logit",weights=matched.data$weights,data=matched.data,cite=F)
    bf0 <- setx(fit1 , bf=0)
    bf1 <- setx(fit1 ,bf=1)
	out <- sim(fit1,x=bf0,x1=bf1)
	
logit.2014 <- c('NonBenef'=out$stats[[1]][1],
				'DiffBenef'=out$stats[[5]][1],
				'BootSe'=out$stats[[5]][2],
				'BootCi'=out$stats[[5]][4:5])
				
fit2m <- lm(dilma~bf,data=matched.data,weights=matched.data$weights)
boot.fct <- function(x,i){##function to bootstrap standard errors of "bf"
	tmp <- lm(dilma ~  bf ,data=x[i,],weights=x$weights)
	out <- coefficients(summary(tmp))["bfTRUE","Estimate"]
	return(out)
}

tmp <- boot(matched.data, boot.fct, R=1000,sim = "ordinary")
boot.se <- sd(tmp$t)
boot.ci <- quantile(tmp$t,prob=c(0.025,0.975))

lpm.bootse.2014 <- c('NonBenef'=as.numeric(fit2m$coef[1]),
				'DiffBenef'=as.numeric(fit2m$coef[2]),
				'BootSe'=as.numeric(boot.se),
				'BootCi'=as.numeric(boot.ci))

## These are the values reported for 2014 post electoral in Figure 1		
output2014post <- list(lpm=lpm.bootse.2014,logit=logit.2014)



######################################################################################
####																			  ####
#### Matching and balance anlaysis	(using WAVE A ONLY, preelectoral)      		  ####
#### 	                                            							  ####
######################################################################################

### Use pre-election vote intention and drop post-election from the set
tmp <- new.data
tmp$dilma <- tmp$dilma.pre
tmp$dilma.post <- NULL
tmp <- na.omit(tmp)
cat("Analysis on pre electoral recall using",nrow(tmp),"of the original",nrow(new.data),"observations\n")


tmp$ps <- ps <- fitted(lm(bf~sexo+idade+escola+
				renda+metropolitan+race+region+munsize+hdi.2010,data=tmp))
match.formula <- formula("bf~sexo+idade+escola+renda+metropolitan+race+region+munsize+ps")
matching <- matchit(match.formula
                   ,data=tmp,method="genetic",replace=T,ratio=1) 
                   

### Plot matching reuslts #########
pdf(file="figure-balimprove2014BEPS-pre.pdf")
plot(summary(matching,standardize=TRUE),interactive=F)
dev.off()

### Compute and save matching statistics ###
match.stats <- cbind(
	summary(matching)$sum.all[,1:2],
	summary(matching)$sum.matched[,1:2],
	Reduction=summary(matching)$reduction[,1]
)   

#Matching was done with replacement, so weights are different for eaach observation
#xBalance doesn't take weights, so use work-arournd to compute Bowler&Hansosn stats
forbal <- rbind(tmp[matching$match.matrix[,1],],	
			tmp[rownames(matching$match.matrix),])
bal<-xBalance(match.formula,data=forbal,report="all")
cat("Bowler and Hanson omnibus test\n");print(bal$overall)
match.stats <- list(match.stats,BowlerHanson=bal$overall)
balancepre<- list(president=match.stats)  #reported in table 1 of the paper!
pdf(file="figure-balance2014BEPS-pre.pdf")
plot(bal,thecols=1,legend=F,mar=c(3,2,.5,.5),xlim=c(-0.3,.3),xaxt="n")
abline(v=c(-.25,.25),lty=2)
dev.off()

### Assemble matched set
matched.data <- match.data(matching)
matched.data$dilma <- as.numeric(as.character(matched.data$dilma))
matched.data$renda <- factor(matched.data$renda)

########################################################################################
#
#  Linear probability models with bootstrapped standard errors and simple logit 
#  See http://cran.r-project.org/doc/contrib/Fox-Companion/appendix-bootstrapping.pdf
#
#######################################################################################
fit1 <- zelig(dilma ~   bf, model="logit",weights=matched.data$weights,data=matched.data,cite=F)
    bf0 <- setx(fit1 , bf=0)
    bf1 <- setx(fit1 ,bf=1)
	out <- sim(fit1,x=bf0,x1=bf1)
	
logit.2014 <- c('NonBenef'=out$stats[[1]][1],
				'DiffBenef'=out$stats[[5]][1],
				'BootSe'=out$stats[[5]][2],
				'BootCi'=out$stats[[5]][4:5])
				
fit2m <- lm(dilma ~  bf ,data=matched.data,weights=matched.data$weights)
boot.fct <- function(x,i){##function to bootstrap standard errors of "bf"
	tmp <- lm(dilma ~  bf ,data=x[i,],weights=x$weights)
	out <- coefficients(summary(tmp))["bfTRUE","Estimate"]
	return(out)
}

tmp <- boot(matched.data, boot.fct, R=1000,sim = "ordinary")
boot.se <- sd(tmp$t)
boot.ci <- quantile(tmp$t,prob=c(0.025,0.975))

lpm.bootse.2014 <- c('NonBenef'=as.numeric(fit2m$coef[1]),
				'DiffBenef'=as.numeric(fit2m$coef[2]),
				'BootSe'=as.numeric(boot.se),
				'BootCi'=as.numeric(boot.ci))


## These are the values reported for 2014 pre electoral in Figure 1						
output2014pre <- list(lpm=lpm.bootse.2014,logit=logit.2014)
